{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "is_executing": false
    }
   },
   "source": [
    "3.13 丢弃法\n",
    "除了前一节介绍的权重衰减以外，深度学习模型常常使用丢弃法（dropout）[1] 来应对过拟合问题。丢弃法有一些不同的变体。本节中提到的丢弃法特指倒置丢弃法（inverted dropout）\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "根据丢弃法的定义，我们可以很容易地实现它。下面的dropout函数将以drop_prob的概率丢弃NDArray输入X中的元素。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from tensorflow import keras,nn\n",
    "from tensorflow.keras.layers import Dropout,Flatten,Dense\n",
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"-1\"   \n",
    "def dropout(X, drop_prob):\n",
    "    assert 0 <= drop_prob <= 1\n",
    "    keep_prob = 1 - drop_prob\n",
    "    # 这种情况下把全部元素都丢弃\n",
    "    if keep_prob == 0:\n",
    "        return tf.zeros_like(X)\n",
    "    #初始mask为一个bool型数组，故需要强制类型转换\n",
    "    mask = tf.keras.backend.random_uniform( X.shape,0,1,dtype=tf.float32)<keep_prob\n",
    "    mask=tf.cast(mask,tf.float32)\n",
    "#     print(mask)\n",
    "    return mask * X / keep_prob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(\n",
      "[[ 0.  1.  2.  3.  4.  5.  6.  7.]\n",
      " [ 8.  9. 10. 11. 12. 13. 14. 15.]], shape=(2, 8), dtype=float32)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: id=3464671, shape=(2, 8), dtype=float32, numpy=\n",
       "array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],\n",
       "       [ 8.,  9., 10., 11., 12., 13., 14., 15.]], dtype=float32)>"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = tf.range(0,16,dtype='float32')\n",
    "X=tf.reshape(X,[2,8])\n",
    "print(X)\n",
    "dropout(X, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: id=3464684, shape=(2, 8), dtype=float32, numpy=\n",
       "array([[ 0.,  2.,  4.,  0.,  8., 10.,  0.,  0.],\n",
       "       [16., 18.,  0., 22., 24.,  0.,  0., 30.]], dtype=float32)>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dropout(X, 0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: id=3464685, shape=(2, 8), dtype=float32, numpy=\n",
       "array([[0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "       [0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dropout(X, 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3.13.2.1. 定义模型参数¶\n",
    "实验中，我们依然使用“softmax回归的从零开始实现”一节中介绍的Fashion-MNIST数据集。我们将定义一个包含两个隐藏层的多层感知机，其中两个隐藏层的输出个数都是256。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [],
   "source": [
    "num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256\n",
    "batch_size=15\n",
    "(x_train,y_train),(x_test,y_test)=keras.datasets.mnist.load_data()\n",
    "x_train = tf.cast(x_train, tf.float32)\n",
    "x_test=tf.cast(x_test,tf.float32)\n",
    "train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)\n",
    "test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)\n",
    "# iter用来生成迭代器\n",
    "train_iter = iter(train_db)\n",
    "# next() 返回迭代器的下一个项目\n",
    "sample = next(train_iter)\n",
    "w1 = tf.Variable(tf.random.truncated_normal(shape=(num_inputs, num_hiddens1), stddev=0.1))\n",
    "b1 = tf.Variable(tf.random.truncated_normal([num_hiddens1], stddev=0.1))\n",
    "w2 = tf.Variable(tf.random.truncated_normal(shape=(num_hiddens1, num_hiddens2), stddev=0.1))\n",
    "b2=tf.Variable(tf.random.truncated_normal([num_hiddens2], stddev=0.1))\n",
    "w3 = tf.Variable(tf.random.truncated_normal(shape=(num_hiddens2, num_outputs), stddev=0.1))\n",
    "b3=tf.Variable(tf.random.truncated_normal([num_outputs], stddev=0.1))\n",
    "# params = [W1, b1, W2, b2, W3, b3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "3.13.2.2. 定义模型\n",
    "下面定义的模型将全连接层和激活函数ReLU串起来，并对每个激活函数的输出使用丢弃法。我们可以分别设置各个层的丢弃概率。通常的建议是把靠近输入层的丢弃概率设得小一点。在这个实验中，我们把第一个隐藏层的丢弃概率设为0.2，把第二个隐藏层的丢弃概率设为0.5。我们可以通过“自动求梯度”一节中介绍的is_training函数来判断运行模式为训练还是测试，并只需在训练模式下使用丢弃法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def net(X):\n",
    "    X =tf.reshape(X,(-1,num_inputs))\n",
    "#     print(X.shape,w1.shape)\n",
    "    H1 = nn.relu((tf.matmul(X, w1) + b1))\n",
    "    # 只在训练模型时使用丢弃法\n",
    "    H1 = dropout(H1, drop_prob1)  # 在第一层全连接后添加丢弃层\n",
    "    H2 = nn.relu((tf.matmul(H1,w2)+b2))\n",
    "    H2 = dropout(H2, drop_prob2)  # 在第二层全连接后添加丢弃层\n",
    "    return tf.math.softmax( tf.matmul(H2,w3) + b3 )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 loss: 15.043557\n"
     ]
    },
    {
     "ename": "InvalidArgumentError",
     "evalue": "In[0] mismatch In[1] shape: 28 vs. 784: [15,28,28] [784,256] 0 0 [Op:BatchMatMulV2] name: MatMul/",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mInvalidArgumentError\u001b[0m                      Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-32-26e9bdf51a35>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     24\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtest_db\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     25\u001b[0m         \u001b[1;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m             \u001b[0mH1\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mb1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     27\u001b[0m             \u001b[1;31m# 只在训练模型时使用丢弃法\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     28\u001b[0m             \u001b[0mH2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mH1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mw2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mb2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mF:\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\util\\dispatch.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    178\u001b[0m     \u001b[1;34m\"\"\"Call target, and fall back on dispatchers if there is a TypeError.\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    179\u001b[0m     \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 180\u001b[1;33m       \u001b[1;32mreturn\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    181\u001b[0m     \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    182\u001b[0m       \u001b[1;31m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mF:\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\ops\\math_ops.py\u001b[0m in \u001b[0;36mmatmul\u001b[1;34m(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name)\u001b[0m\n\u001b[0;32m   2725\u001b[0m         \u001b[0mb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconj\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2726\u001b[0m         \u001b[0madjoint_b\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2727\u001b[1;33m       \u001b[1;32mreturn\u001b[0m \u001b[0mbatch_mat_mul_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0madj_x\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0madjoint_a\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0madj_y\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0madjoint_b\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2728\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2729\u001b[0m     \u001b[1;31m# Neither matmul nor sparse_matmul support adjoint, so we conjugate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mF:\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\ops\\gen_math_ops.py\u001b[0m in \u001b[0;36mbatch_mat_mul_v2\u001b[1;34m(x, y, adj_x, adj_y, name)\u001b[0m\n\u001b[0;32m   1701\u001b[0m       \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1702\u001b[0m         \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1703\u001b[1;33m       \u001b[0m_six\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_core\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_status_to_exception\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1704\u001b[0m   \u001b[1;31m# Add nodes to the TensorFlow graph.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1705\u001b[0m   \u001b[1;32mif\u001b[0m \u001b[0madj_x\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mF:\\Anaconda3\\lib\\site-packages\\six.py\u001b[0m in \u001b[0;36mraise_from\u001b[1;34m(value, from_value)\u001b[0m\n",
      "\u001b[1;31mInvalidArgumentError\u001b[0m: In[0] mismatch In[1] shape: 28 vs. 784: [15,28,28] [784,256] 0 0 [Op:BatchMatMulV2] name: MatMul/"
     ]
    }
   ],
   "source": [
    "num_epochs, lr, batch_size = 5, 0.5, 256\n",
    "drop_prob1, drop_prob2 = 0.2, 0.5\n",
    "loss_all = 0\n",
    "for epoch in range(num_epochs):  #数据集级别迭代\n",
    "    for step,(x_train,y_train) in enumerate(train_db):  #batch级别迭代\n",
    "        with tf.GradientTape() as tape:\n",
    "#             print(x_train.shape)\n",
    "            y_hat = net(x_train)\n",
    "            loss =tf.reduce_mean(tf.losses.sparse_categorical_crossentropy(y_train,y_hat))#采用交叉熵损失函数\n",
    "            loss_all += loss.numpy()#将loss的张量转化为numpy型用来相加\n",
    "\n",
    "            # compute gradients 进行梯度的计算\n",
    "            grads = tape.gradient(loss, [w1, b1, w2, b2,w3,b3])\n",
    "            w1.assign_sub(grads[0])\n",
    "            b1.assign_sub(grads[1])\n",
    "            w2.assign_sub(grads[2])\n",
    "            b2.assign_sub(grads[3])\n",
    "            w3.assign_sub(grads[4])\n",
    "            b3.assign_sub(grads[5])\n",
    "\n",
    "    print(epoch, 'loss:', loss.numpy())\n",
    "    total_correct, total_number = 0, 0\n",
    "\n",
    "    for x,y in test_db :\n",
    "        with tf.GradientTape() as tape:\n",
    "            H1 = nn.relu((tf.matmul(x, w1) + b1))\n",
    "            # 只在训练模型时使用丢弃法\n",
    "            H2 = nn.relu((tf.matmul(H1,w2)+b2))\n",
    "            y_hat = tf.math.softmax( tf.matmul(H2,w3) + b3 )\n",
    "            y=tf.cast(y,'int64')\n",
    "            correct=acc(y_hat,y)\n",
    "    print(epoch,\"test_acc:\", correct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "3.13.3 简洁实现\n",
    "在Tensorflow2.0中，我们只需要在全连接层后添加Dropout层并指定丢弃概率。在训练模型时，Dropout层将以指定的丢弃概率随机丢弃上一层的输出元素；在测试模型时（即model.eval()后），Dropout层并不发挥作用。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 60000 samples, validate on 10000 samples\n",
      "Epoch 1/5\n",
      "60000/60000 [==============================] - 2s 39us/sample - loss: 7.0662 - accuracy: 0.7214 - val_loss: 0.6324 - val_accuracy: 0.8947\n",
      "Epoch 2/5\n",
      "60000/60000 [==============================] - 2s 30us/sample - loss: 0.9542 - accuracy: 0.8231 - val_loss: 0.4979 - val_accuracy: 0.9069\n",
      "Epoch 3/5\n",
      "60000/60000 [==============================] - 2s 31us/sample - loss: 0.6399 - accuracy: 0.8674 - val_loss: 0.3951 - val_accuracy: 0.9231\n",
      "Epoch 4/5\n",
      "60000/60000 [==============================] - 2s 30us/sample - loss: 0.4849 - accuracy: 0.8937 - val_loss: 0.3139 - val_accuracy: 0.9328\n",
      "Epoch 5/5\n",
      "60000/60000 [==============================] - 2s 30us/sample - loss: 0.4048 - accuracy: 0.9066 - val_loss: 0.2716 - val_accuracy: 0.9424\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x509d6ef390>"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = keras.Sequential([\n",
    "    keras.layers.Flatten(input_shape=(28, 28)),\n",
    "    keras.layers.Dense(256,activation='relu'),\n",
    "    Dropout(0.2),\n",
    "    keras.layers.Dense(256,activation='relu'),\n",
    "    Dropout(0.5),\n",
    "    keras.layers.Dense(10,activation=tf.nn.softmax)\n",
    "])\n",
    "model.compile(optimizer=tf.keras.optimizers.Adam(),\n",
    "              loss = 'sparse_categorical_crossentropy',\n",
    "              metrics=['accuracy'])\n",
    "model.fit(x_train,y_train,epochs=5,batch_size=256,validation_data=(x_test, y_test),\n",
    "                    validation_freq=1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "小结\n",
    "我们可以通过使用丢弃法应对过拟合。\n",
    "丢弃法只在训练模型时使用。\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  },
  "pycharm": {
   "stem_cell": {
    "cell_type": "raw",
    "metadata": {
     "collapsed": false
    },
    "source": []
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
